import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
data = pd.read_excel(r"advertising_data.xlsx")
print(data.head())
TV Radio Newspaper Sales 0 230.1 37.8 69.2 22.1 1 44.5 39.3 45.1 10.4 2 17.2 45.9 69.3 12.0 3 151.5 41.3 58.5 16.5 4 180.8 10.8 58.4 17.9
print(data.isnull().sum())
TV 0 Radio 0 Newspaper 0 Sales 0 dtype: int64
import plotly.express as px
import plotly.graph_objects as go
figure = px.scatter(data_frame = data, x="Sales",
y="TV", size="TV", trendline="ols")
figure.show()
figure = px.scatter(data_frame = data, x="Sales",
y="Newspaper", size="Newspaper", trendline="ols")
figure.show()
figure = px.scatter(data_frame = data, x="Sales",
y="Radio", size="Radio", trendline="ols")
figure.show()
correlation = data.corr()
print(correlation["Sales"].sort_values(ascending=False))
Sales 1.000000 TV 0.901208 Radio 0.349631 Newspaper 0.157960 Name: Sales, dtype: float64
x = np.array(data.drop(["Sales"], 1))
y = np.array(data["Sales"])
xtrain, xtest, ytrain, ytest = train_test_split(x, y,
test_size=0.2,
random_state=42)
C:\Users\khatr\AppData\Local\Temp/ipykernel_22572/2488982787.py:1: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only
model = LinearRegression()
model.fit(xtrain, ytrain)
print(model.score(xtest, ytest))
0.9059011844150826
#features = [[TV, Radio, Newspaper]]
features = np.array([[230.1, 37.8, 69.2]])
print(model.predict(features))
[21.37254028]